LSTM AutoEncoder

気象情報をセンサデータと見立てて異常検知してみる。入力は気象庁の過去の天気情報で入手。1980年から2020年(06月まで)の毎日の天気情報。

In [1]:
import pandas as pd
import numpy as np

import os
from tqdm import tqdm

import tensorflow as tf
import pandas as pd
pd.options.mode.chained_assignment = None
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
import plotly.express as px
import plotly.graph_objects as go
%matplotlib inline
sns.set(style='whitegrid',palette ='muted')
rcParams['figure.figsize']=14, 8
np.random.seed(1)
# tf.random.set_seed(1)
print("TensorFlow Version: ",tf.__version__)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  np_resource = np.dtype([("resource", np.ubyte, 1)])
/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
/usr/local/lib/python3.6/dist-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  np_resource = np.dtype([("resource", np.ubyte, 1)])
TensorFlow Version:  1.14.0

データの準備

予め超かんたんなクレンジングを実行済み。input配下にCSVが入れてある状況。
サンプルとして一番初めの月だけ見てみる。その後全期間のデータをマージ、風向きデータのOneHotが面倒なので可視化後に除外。数値データだけ残して正規化する。
※最高気温、最低気温、平均気温など相関の強いカラムが多いので本来はカラムを絞ったり次元削減するべきだが、とりあえずのモデルなのでそこまではこだわらない。

In [2]:
INPUT_DIR = 'input/'
In [3]:
csv_files = os.listdir(INPUT_DIR)
csv_files.sort()
In [4]:
# とりあえず初めの月だけ表示してみる
input_path = os.path.join(INPUT_DIR, csv_files[0])
df = pd.read_csv(input_path)
df
Out[4]:
date precipitation_sum precipitation_max_hour precipitation_max_10minute temperature_mean temperature_max temperature_min wind_velocity_mean wind_velocity_max wind_velocity_max_direction maximum_instantaneous_wind_speed maximum_instantaneous_wind_speed_direction most_common_wind_direction solar_irradiation_hour snowfall snow_accumulation
0 1980/1/1 0.0 0.0 0.0 6.8 8.6 3.6 3.0 6.0 NNE 0.0 NaN NE 5.8 0.0 0.0
1 1980/1/2 3.0 2.0 0.0 5.3 8.2 2.1 2.3 6.0 NE 0.0 NaN NNE 5.7 0.0 0.0
2 1980/1/3 26.0 4.0 0.0 6.7 9.7 3.9 1.6 3.0 NNE 0.0 NaN NNE 0.1 0.0 0.0
3 1980/1/4 20.0 8.0 0.0 11.6 17.9 7.5 2.9 7.0 NE 0.0 NaN E 5.3 0.0 0.0
4 1980/1/5 3.0 2.0 0.0 5.1 8.4 2.4 3.2 8.0 NE 0.0 NaN NE 3.5 0.0 0.0
5 1980/1/6 0.0 0.0 0.0 7.5 12.1 1.6 4.0 9.0 SW 0.0 NaN SW 8.3 0.0 0.0
6 1980/1/7 0.0 0.0 0.0 7.8 12.2 0.4 6.4 12.0 WSW 0.0 NaN WSW 7.8 0.0 0.0
7 1980/1/8 0.0 0.0 0.0 2.8 5.9 0.0 1.6 4.0 NE 0.0 NaN NNE 9.2 0.0 0.0
8 1980/1/9 0.0 0.0 0.0 4.1 6.8 0.7 1.2 4.0 ENE 0.0 NaN NE 2.1 0.0 0.0
9 1980/1/10 2.0 1.0 0.0 6.9 10.9 1.4 3.3 9.0 WSW 0.0 NaN WSW 7.8 0.0 0.0
10 1980/1/11 0.0 0.0 0.0 4.6 7.2 2.1 2.0 4.0 NNE 0.0 NaN NNE 7.4 0.0 0.0
11 1980/1/12 0.0 0.0 0.0 5.3 8.7 1.9 2.2 4.0 SW 0.0 NaN NE 7.2 0.0 0.0
12 1980/1/13 22.0 3.0 0.0 3.2 5.2 1.5 2.0 3.0 NNE 0.0 NaN NNE 0.0 0.0 0.0
13 1980/1/14 0.0 0.0 0.0 7.5 10.9 3.0 4.7 8.0 WSW 0.0 NaN WSW 8.8 0.0 0.0
14 1980/1/15 0.0 0.0 0.0 6.2 9.3 2.0 4.5 7.0 WSW 0.0 NaN WSW 9.4 0.0 0.0
15 1980/1/16 0.0 0.0 0.0 6.6 8.6 4.0 6.0 10.0 WSW 0.0 NaN WSW 9.5 0.0 0.0
16 1980/1/17 0.0 0.0 0.0 7.3 10.5 5.0 7.3 11.0 SW 0.0 NaN WSW 8.5 0.0 0.0
17 1980/1/18 0.0 0.0 0.0 3.7 8.8 -0.2 1.6 4.0 NW 0.0 NaN NNE 8.5 0.0 0.0
18 1980/1/19 0.0 0.0 0.0 5.9 9.0 1.3 2.3 6.0 SSW 0.0 NaN SW 8.0 0.0 0.0
19 1980/1/20 0.0 0.0 0.0 9.8 11.6 7.7 5.8 9.0 WSW 0.0 NaN WSW 5.6 0.0 0.0
20 1980/1/21 0.0 0.0 0.0 5.8 9.7 1.9 3.9 8.0 WSW 0.0 NaN WSW 8.0 0.0 0.0
21 1980/1/22 0.0 0.0 0.0 3.1 7.8 -0.2 1.1 3.0 NE 0.0 NaN NNE 7.1 0.0 0.0
22 1980/1/23 0.0 0.0 0.0 5.0 8.6 0.0 2.1 6.0 WSW 0.0 NaN WSW 9.7 0.0 0.0
23 1980/1/24 0.0 0.0 0.0 7.0 10.5 2.4 2.6 5.0 NE 0.0 NaN NNE 7.3 0.0 0.0
24 1980/1/25 0.0 0.0 0.0 4.6 8.9 1.0 1.8 4.0 NE 0.0 NaN NNE 9.6 0.0 0.0
25 1980/1/26 0.0 0.0 0.0 6.3 11.5 1.1 1.3 3.0 ENE 0.0 NaN NNE 9.7 0.0 0.0
26 1980/1/27 0.0 0.0 0.0 9.5 14.2 1.6 3.2 7.0 SW 0.0 NaN SW 8.7 0.0 0.0
27 1980/1/28 57.0 15.0 0.0 13.9 16.9 12.4 2.5 5.0 SW 0.0 NaN SW 1.4 0.0 0.0
28 1980/1/29 12.0 6.0 0.0 9.2 11.1 7.9 3.2 5.0 NE 0.0 NaN NNE 0.0 0.0 0.0
29 1980/1/30 30.0 5.0 0.0 7.4 9.9 5.7 2.5 4.0 NNE 0.0 NaN NNE 0.3 0.0 0.0
30 1980/1/31 0.0 0.0 0.0 7.9 11.0 4.2 3.8 10.0 WSW 0.0 NaN WSW 8.5 0.0 0.0
In [5]:
# 全データを結合
df = pd.DataFrame(columns=df.columns)

for csv_file in tqdm(csv_files):
    input_path = os.path.join(INPUT_DIR, csv_file)
    tmp_df = pd.read_csv(input_path, parse_dates=['date'])
    df = pd.concat([df, tmp_df])
df = df.fillna('').reset_index(drop=True)
100%|██████████| 486/486 [00:04<00:00, 118.48it/s]
In [6]:
# 風向きはいらない
direction_cols = ['wind_velocity_max_direction', 'maximum_instantaneous_wind_speed_direction', 'most_common_wind_direction']
train_cols = [col for col in df.columns.tolist() if not (col in direction_cols)]
df = df[train_cols]
In [7]:
train_size = int(len(df)*0.85)
test_size = len(df)-train_size
train,test = df.iloc[0:train_size],df.iloc[train_size:len(df)]
print(train.shape,test.shape)

feature_cols = train.drop('date', axis=1).columns.tolist()
(12573, 13) (2219, 13)

可視化

超大雑把に可視化しておく。残念ながら積雪や降雪の記録は取れていないらしい。三浦地方は大雪は少ないから異常がわかりやすいと思ったのに。。。
pandas-profilingがものすごくリッチに成長していてビックリ。先述の通り相関が高いものを警告したりしてくれてる。

In [8]:
for col in feature_cols:
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df['date'],y=df[col],mode='lines',name=col))
    fig.update_layout(showlegend=True)
    fig.show()
In [9]:
import pandas_profiling as pdp
pdp.ProfileReport(df)



Out[9]:

正規化

標準化、正規化は悩みどころだけど、あとで各カラムの予測値と実測値の差分を足し合わせる関係で正規化にしておく

In [10]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

X_train = train[feature_cols]
X_test = test[feature_cols]

scaler = MinMaxScaler()
scaler = scaler.fit(X_train)

X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

モデル生成

あまり深くする必要もないのだが、せっかくなので多少は積んでおく。推論が遅くて使えなかったら考える。

In [11]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input,LSTM,Dense,Dropout,RepeatVector,TimeDistributed
from tensorflow.keras import regularizers

def build_lstm_ae(X_train):
    inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))
    x = LSTM(256, activation='relu', return_sequences=True, 
             kernel_regularizer=regularizers.l2(0.00))(inputs)
    x = Dropout(0.1)(x)
    x = LSTM(128, activation='relu', return_sequences=True)(x)
    x = Dropout(0.1)(x)
    x = LSTM(64, activation='relu', return_sequences=True)(x)
    x = Dropout(0.1)(x)
    x = LSTM(32, activation='relu', return_sequences=True)(x)
    x = Dropout(0.1)(x)
    x = LSTM(16, activation='relu', return_sequences=False)(x)
    x = Dropout(0.1)(x)
    x = RepeatVector(X_train.shape[1])(x)
    x = LSTM(16, activation='relu', return_sequences=True)(x)
    x = Dropout(0.1)(x)
    x = LSTM(32, activation='relu', return_sequences=True)(x)
    x = Dropout(0.1)(x)
    x = LSTM(64, activation='relu', return_sequences=True)(x)
    x = Dropout(0.1)(x)
    x = LSTM(128, activation='relu', return_sequences=True)(x)
    x = Dropout(0.1)(x)
    x = LSTM(256, activation='relu', return_sequences=True)(x)
    output = TimeDistributed(Dense(X_train.shape[2]))(x)    
    model = Model(inputs=inputs, outputs=output)
    return model


model = build_lstm_ae(X_train)
model.compile(loss='mae',optimizer='adam',metrics=['accuracy'])
model.summary()
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 1, 12)]           0         
_________________________________________________________________
lstm (LSTM)                  (None, 1, 256)            275456    
_________________________________________________________________
dropout (Dropout)            (None, 1, 256)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 1, 128)            197120    
_________________________________________________________________
dropout_1 (Dropout)          (None, 1, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 1, 64)             49408     
_________________________________________________________________
dropout_2 (Dropout)          (None, 1, 64)             0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 1, 32)             12416     
_________________________________________________________________
dropout_3 (Dropout)          (None, 1, 32)             0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 16)                3136      
_________________________________________________________________
dropout_4 (Dropout)          (None, 16)                0         
_________________________________________________________________
repeat_vector (RepeatVector) (None, 1, 16)             0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 1, 16)             2112      
_________________________________________________________________
dropout_5 (Dropout)          (None, 1, 16)             0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 1, 32)             6272      
_________________________________________________________________
dropout_6 (Dropout)          (None, 1, 32)             0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 1, 64)             24832     
_________________________________________________________________
dropout_7 (Dropout)          (None, 1, 64)             0         
_________________________________________________________________
lstm_8 (LSTM)                (None, 1, 128)            98816     
_________________________________________________________________
dropout_8 (Dropout)          (None, 1, 128)            0         
_________________________________________________________________
lstm_9 (LSTM)                (None, 1, 256)            394240    
_________________________________________________________________
time_distributed (TimeDistri (None, 1, 12)             3084      
=================================================================
Total params: 1,066,892
Trainable params: 1,066,892
Non-trainable params: 0
_________________________________________________________________
In [12]:
# AuroEncoderなのでラベルは不要。特徴量とラベルがおんなじって感じ。仮にノイズ除去モデルを作りたいなら第一引数をノイズを付与したX_trainにする
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=5,mode='min')
history = model.fit(
    X_train, X_train, epochs=100,
    batch_size=256, validation_split=0.1, callbacks=[es], shuffle=False
)
Train on 11315 samples, validate on 1258 samples
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/100
11315/11315 [==============================] - 6s 519us/sample - loss: 0.1810 - acc: 0.3326 - val_loss: 0.1665 - val_acc: 0.2424
Epoch 2/100
11315/11315 [==============================] - 1s 105us/sample - loss: 0.1007 - acc: 0.2681 - val_loss: 0.1272 - val_acc: 0.1025
Epoch 3/100
11315/11315 [==============================] - 1s 113us/sample - loss: 0.0895 - acc: 0.1317 - val_loss: 0.1251 - val_acc: 0.1367
Epoch 4/100
11315/11315 [==============================] - 1s 108us/sample - loss: 0.0894 - acc: 0.1547 - val_loss: 0.1253 - val_acc: 0.1367
Epoch 5/100
11315/11315 [==============================] - 1s 101us/sample - loss: 0.0896 - acc: 0.1547 - val_loss: 0.1255 - val_acc: 0.1367
Epoch 6/100
11315/11315 [==============================] - 1s 108us/sample - loss: 0.0895 - acc: 0.1547 - val_loss: 0.1256 - val_acc: 0.1367
Epoch 7/100
11315/11315 [==============================] - 1s 106us/sample - loss: 0.0893 - acc: 0.1547 - val_loss: 0.1257 - val_acc: 0.1367
Epoch 8/100
11315/11315 [==============================] - 1s 106us/sample - loss: 0.0894 - acc: 0.1546 - val_loss: 0.1261 - val_acc: 0.1367
In [13]:
plt.plot(history.history['loss'],label='Training Loss')
plt.plot(history.history['val_loss'],label='Validation Loss')
plt.legend()
Out[13]:
<matplotlib.legend.Legend at 0x7ff7681b6860>
In [14]:
model.evaluate(X_test,X_test)
2219/2219 [==============================] - 0s 123us/sample - loss: 0.1235 - acc: 0.1352
Out[14]:
[0.12345795948503253, 0.13519603]

異常検知

AutoEncoderなので予測値は実際の気象情報を変換して復元した値になる。復元した値と実際の値の差分を取って、差分が大きいところを異常とする。
気象情報なので多少は異常があると仮定しているが、仮に本物のセンサデータで異常発生時のデータが無い場合は差分の平均との差分が+3σより大きかったら異常とか、観測データの差分の最大値+αを超えたら異常って扱いにするのかな。
差分はMAEとMSEで見ているが、正規化しているから差分は1未満になるのでMAEよりMSEのほうが顕著に出ているように見える。

In [15]:
# plot the loss distribution of the training set
X_pred = model.predict(X_train)
X_pred = X_pred.reshape(X_pred.shape[0], X_pred.shape[2])
X_pred = pd.DataFrame(X_pred, columns=feature_cols)
X_pred.index = train.index

scored = pd.DataFrame(index=train.index)
Xtrain = X_train.reshape(X_train.shape[0], X_train.shape[2])
scored['Loss_mae'] = np.mean(np.abs(X_pred-Xtrain), axis = 1)
scored['Loss_mse'] = np.mean(np.power(X_pred-Xtrain, 2), axis=1)

plt.figure(figsize=(16,9), dpi=80)
plt.title('Loss Distribution', fontsize=16)
sns.distplot(scored['Loss_mae'], bins=50, kde=True, color='red', label='mae')
sns.distplot(scored['Loss_mse'], bins=50, kde=True, color='blue', label='mse')
plt.xlim([0.0,.3])
Out[15]:
(0.0, 0.3)
In [16]:
X_pred = model.predict(X_test)
X_pred = X_pred.reshape(X_pred.shape[0], X_pred.shape[2])
X_pred = pd.DataFrame(X_pred, columns=feature_cols)
X_pred.index = test.index

scored = pd.DataFrame(index=test.index)
Xtest = X_test.reshape(X_test.shape[0], X_test.shape[2])

scored['mae'] = np.mean(np.abs(X_pred-Xtest), axis = 1)
scored['mse'] = np.mean(np.power(X_pred-Xtest, 2), axis = 1)
plt.figure(figsize=(16,9), dpi=80)
plt.title('Test Error Distribution', fontsize=16)
sns.distplot(scored['mae'], bins=50, kde=True, color='red', label='mae')
sns.distplot(scored['mse'], bins=50, kde=True, color='blue', label='mse')
plt.xlim([0.0,.3])
Out[16]:
(0.0, 0.3)
In [17]:
X_test_pred = model.predict(X_test)
mse = np.mean(np.power(X_test - X_test_pred, 2), axis=1)
mse = np.sum(mse, axis=1)

mae = np.mean(np.abs(X_test - X_test_pred), axis = 1)
mae = np.sum(mae, axis=1)

test_df = test.copy()
test_df['mse'] = mse
test_df['mae'] = mae
In [18]:
# 全体のthreshold*100 % は正常値として扱い、それ以外を異常とする。そのための閾値を計算する。
threshold = 0.998
mse_threshold = np.quantile(mse, threshold)
mae_threshold = np.quantile(mae, threshold)
print('MSE {0} threshold:{1}'.format(threshold, mse_threshold))
print('MAE {0} threshold:{1}'.format(threshold, mae_threshold))
MSE 0.998 threshold:2.191145337802903
MAE 0.998 threshold:4.0436720852001145
In [19]:
display(test_df[test_df['mse'] > mse_threshold])
display(test_df[test_df['mae'] > mae_threshold])
date precipitation_sum precipitation_max_hour precipitation_max_10minute temperature_mean temperature_max temperature_min wind_velocity_mean wind_velocity_max maximum_instantaneous_wind_speed solar_irradiation_hour snowfall snow_accumulation mse mae
12697 2014-10-06 85.0 34.0 11.0 21.5 26.6 16.6 6.6 22.1 38.5 3.6 0.0 0.0 2.417163 4.216528
13383 2016-08-22 136.5 33.0 10.0 26.0 27.2 23.2 7.0 15.2 26.2 0.0 0.0 0.0 2.194841 4.501088
13785 2017-09-28 198.5 87.0 18.5 20.3 24.2 17.3 3.2 7.6 14.4 0.6 0.0 0.0 3.177929 4.321809
14496 2019-09-09 91.5 33.0 11.0 27.5 31.2 24.8 7.6 21.0 41.7 7.2 0.0 0.0 2.915647 4.872119
14529 2019-10-12 147.5 25.5 9.0 23.9 25.4 21.6 9.0 21.2 33.9 0.0 0.0 0.0 2.908776 4.901335
date precipitation_sum precipitation_max_hour precipitation_max_10minute temperature_mean temperature_max temperature_min wind_velocity_mean wind_velocity_max maximum_instantaneous_wind_speed solar_irradiation_hour snowfall snow_accumulation mse mae
12697 2014-10-06 85.0 34.0 11.0 21.5 26.6 16.6 6.6 22.1 38.5 3.6 0.0 0.0 2.417163 4.216528
13383 2016-08-22 136.5 33.0 10.0 26.0 27.2 23.2 7.0 15.2 26.2 0.0 0.0 0.0 2.194841 4.501088
13785 2017-09-28 198.5 87.0 18.5 20.3 24.2 17.3 3.2 7.6 14.4 0.6 0.0 0.0 3.177929 4.321809
14496 2019-09-09 91.5 33.0 11.0 27.5 31.2 24.8 7.6 21.0 41.7 7.2 0.0 0.0 2.915647 4.872119
14529 2019-10-12 147.5 25.5 9.0 23.9 25.4 21.6 9.0 21.2 33.9 0.0 0.0 0.0 2.908776 4.901335
In [20]:
test_df['mse_threshold'] = mse_threshold
fig = go.Figure()
fig.add_trace(go.Scatter(x=test_df['date'],y=test_df['mse'],mode='lines',name='mse'))
fig.add_trace(go.Scatter(x=test_df['date'],y=test_df['mse_threshold'],mode='lines',name='mse_threshold'))
fig.update_layout(showlegend=True)
fig.show()

test_df['mae_threshold'] = mae_threshold
fig = go.Figure()
fig.add_trace(go.Scatter(x=test_df['date'],y=test_df['mae'],mode='lines',name='mae'))
fig.add_trace(go.Scatter(x=test_df['date'],y=test_df['mae_threshold'],mode='lines',name='mae_threshold'))
fig.update_layout(showlegend=True)
fig.show()

fig = go.Figure()
fig.add_trace(go.Scatter(x=test_df['date'],y=test_df['mse'],mode='lines',name='mse'))
fig.add_trace(go.Scatter(x=test_df['date'],y=test_df['mse_threshold'],mode='lines',name='mse_threshold'))
fig.add_trace(go.Scatter(x=test_df['date'],y=test_df['mae'],mode='lines',name='mae'))
fig.add_trace(go.Scatter(x=test_df['date'],y=test_df['mae_threshold'],mode='lines',name='mae_threshold'))
fig.update_layout(showlegend=True)
fig.show()